This document provides an data analysis of the data sent by WSUP of the Kanyama’s toilet avaliation, it is included in this document data cleaning, organizing and visualization.
Kanyama.raw <- read_xlsx("KANYAMA.xlsx", sheet = 2, skip = 1)
Kanyama <- Kanyama.raw %>% select(-c(1:23, 27)) %>% filter(.$`Are you willing to participate?` == "Yes")
str(Kanyama.raw)
## Classes 'tbl_df', 'tbl' and 'data.frame': 16065 obs. of 292 variables:
## $ Deployment : chr "Data Entry For Survey" "Data Entry For Survey" "Data Entry For Survey" "Data Entry For Survey" ...
## $ Enumerator : chr "#########" "#########" "#########" "#########" ...
## $ Status : chr "Final" "Final" "Final" "Final" ...
## $ Response Code : chr "Ted mwitwa-" "neduser0217-5HXV2A" "neduser0317-5HXVY5" "nedusera001917-5HXVBN" ...
## $ Drafted On : POSIXct, format: "2009-01-01 06:36:13" "2017-03-20 08:44:24" ...
## $ Submitted On : POSIXct, format: "2009-01-01 07:21:46" "2017-03-20 09:05:14" ...
## $ Approval Level : logi NA NA NA NA NA NA ...
## $ IP Address : chr "41.72.102.114" "41.72.102.200" "41.72.102.51" "41.72.102.63" ...
## $ ENUMERATORS NAME : chr "#########" "#########" "#########" "#########" ...
## $ ENUMERATORS NAME (Other (please specify)) - specify : chr "#########" "#########" "#########" "#########" ...
## $ ENUMERATORS NAME (Other (please specify)) - specify2 : chr "#########" "#########" "#########" "#########" ...
## $ SUPERVISOR’S NAME : chr "#########" "#########" "#########" "#########" ...
## $ NAME OF PERSON TAKING THE TOILET READINGS : chr "#########" "#########" "#########" "#########" ...
## $ NAME OF PERSON TAKING THE TOILET READINGS (Other (please specify)) - specify : chr "#########" "#########" "#########" "#########" ...
## $ IS THERE AN ELIGIBLE CANDIDATE TO INTERVIEW? : chr "No" "Yes" "Yes" "Yes" ...
## $ EXPLAIN REASON FOR NOT FINDING RESPONDENT : chr "Its a seventh day church and there's no care taker" NA NA NA ...
## $ Record plot number : chr "Unknown" "47/33" "47\\31" "47/32" ...
## $ Record date for next visit : POSIXct, format: "2009-01-01" NA ...
## $ Record date for next visit (Location Answered) (latitude) : num -15.4 NA NA NA NA ...
## $ Record date for next visit (Location Answered) (longitude) : num 28.3 NA NA NA NA ...
## $ Record date for next visit (Location Answered - accuracy) : num 6.1 NA NA NA NA ...
## $ Record date for next visit (Location Answered - altitude) : num 1249 NA NA NA NA ...
## $ NUMBER OF TIMES THAT YOU HAVE VISITED THE PLACE : num 2 NA NA NA NA NA NA NA NA NA ...
## $ DATE OF INTERVIEW : POSIXct, format: "2017-10-16" "2017-03-20" ...
## $ DATE OF INTERVIEW (Time Answered) : POSIXct, format: "2009-01-01 06:36:48" "2017-03-20 08:45:07" ...
## $ Are you willing to participate? : chr NA "Yes" "Yes" "Yes" ...
## $ Reasons for refusing to participate? : chr NA NA NA NA ...
## $ DESCRIPTION OF RESPONDENT: Landlord - How long have you stayed/been associated with this plot? (magnitude) : num NA 1 NA NA 5 NA NA 1 11 47 ...
## $ DESCRIPTION OF RESPONDENT: Landlord - How long have you stayed/been associated with this plot? (units) : chr NA NA NA NA ...
## $ DESCRIPTION OF RESPONDENT: Landlord - SEX : num NA 2 NA NA 2 1 NA 2 2 2 ...
## $ DESCRIPTION OF RESPONDENT: Caretaker - How long have you stayed/been associated with this plot? (magnitude) : num NA NA NA NA NA NA NA NA NA NA ...
## $ DESCRIPTION OF RESPONDENT: Caretaker - How long have you stayed/been associated with this plot? (units) : chr NA NA NA NA ...
## $ DESCRIPTION OF RESPONDENT: Caretaker - SEX : num NA NA NA NA NA NA NA NA NA NA ...
## $ DESCRIPTION OF RESPONDENT: Tenant - How long have you stayed/been associated with this plot? (magnitude) : num NA NA 4 16 NA NA NA NA NA NA ...
## $ DESCRIPTION OF RESPONDENT: Tenant - How long have you stayed/been associated with this plot? (units) : chr NA NA "Years" "Years" ...
## $ DESCRIPTION OF RESPONDENT: Tenant - SEX : num NA NA 1 2 NA NA 2 NA NA NA ...
## $ RECORD TYPE OF PROPERTY : chr NA "Residential Plot" "Residential Plot" "Residential Plot" ...
## $ RECORD TYPE OF PROPERTY (Other (please specify)) - specify : chr NA NA NA NA ...
## $ SELECT ZONE : chr NA "ZONE 5" "ZONE 5" "ZONE 5" ...
## $ SELECT ZONE (Other (please specify)) - specify : chr NA NA NA NA ...
## $ SELECT ZONE (Other (please specify)) - specify3 : logi NA NA NA NA NA NA ...
## $ SELECT ZONE (Other (please specify)) - specify4 : logi NA NA NA NA NA NA ...
## $ SELECT ZONE (Other (please specify)) - specify5 : logi NA NA NA NA NA NA ...
## $ SELECT ZONE (Other (please specify)) - specify6 : logi NA NA NA NA NA NA ...
## $ SELECT ZONE SECTION : chr NA "A" "A" "A" ...
## $ SELECT ZONE SECTION (Other (please specify)) - specify : chr NA NA NA NA ...
## $ SELECT ZONE SECTION (Other (please specify)) - specify7 : chr NA NA NA NA ...
## $ SELECT ZONE SECTION (Other (please specify)) - specify8 : chr NA NA NA NA ...
## $ 1.2 : chr NA "Sunrise" "Salad house" "Sunrise" ...
## $ 1.3 : chr NA "47/33" "47\\31" "47/32" ...
## $ 1.3 (Don't Know) : logi NA NA NA NA NA NA ...
## $ 1.4 : num NA 4 4 13 4 4 7 2 7 4 ...
## $ 1.5 : num NA 25 22 38 14 12 6 8 19 8 ...
## $ 1.5 (Don't Know) : logi NA NA NA NA NA NA ...
## $ 1.6 - 1 - 1.5.1 : num NA NA NA NA NA NA NA NA NA NA ...
## $ 1.6 - 2 - 1.5.1 : num NA NA NA NA NA 1 NA NA NA NA ...
## $ 1.6 - 3 - 1.5.1 : num NA NA NA NA NA NA NA 1 NA NA ...
## $ 1.6 - 4 - 1.5.1 : num NA NA NA NA NA NA NA NA NA NA ...
## $ 1.6 - 5 - 1.5.1 : num NA NA NA NA NA NA NA NA NA NA ...
## $ 1.6 - 6 - 1.5.1 : num NA NA NA NA NA NA NA NA NA NA ...
## $ 1.6 - 7 - 1.5.1 : num NA 1 NA 1 1 NA 1 NA 1 1 ...
## $ 1.6 - 8 - 1.5.1 : num NA NA NA NA NA NA NA NA NA NA ...
## $ 1.6 - 9 - 1.5.1 : num NA NA NA NA NA 0 1 NA NA NA ...
## $ Total number of toilets{0} : logi NA NA NA NA NA NA ...
## $ 1.6.1 Do you think there is space on this plot to construct another toilet?: Yes - If Yes how many more? If No, why is that the case?: chr NA NA "4" NA ...
## $ 1.6.1 Do you think there is space on this plot to construct another toilet?: No - If Yes how many more? If No, why is that the case? : chr NA "There are ground stones making it impossible to dig another toilet" NA "No" ...
## $ 1.6.2 : chr NA "Other (please specify)" "Individual connection" "Kiosk" ...
## $ 1.6.2 (Other (please specify)) - specify : chr NA "Lusaka water connected tap" NA NA ...
## $ 1.7 : chr "#########" "#########" "#########" "#########" ...
## $ 1.7.1 : chr "#########" "#########" "#########" "#########" ...
## $ 1.8 : chr NA "Yes" "No" "No" ...
## $ 1.9 (latitude) : num NA NA NA -15.4 -15.4 ...
## $ 1.9 (longitude) : num NA NA NA 28.2 28.2 ...
## $ 1.9 (administrative region) : chr NA NA NA "Lusaka, Lusaka, Zambia" ...
## $ 1.9 (accuracy) : num NA NA NA 3.9 3.9 ...
## $ 1.9 (altitude) : num NA NA NA 1290 1269 ...
## $ 1.9 (Time Answered) : POSIXct, format: NA NA ...
## $ 1.9 (Location Answered) (latitude) : num NA NA NA -15.4 -15.4 ...
## $ 1.9 (Location Answered) (longitude) : num NA NA NA 28.2 28.2 ...
## $ 1.9 (Location Answered - accuracy) : num NA NA NA 3.9 3.9 ...
## $ 1.9 (Location Answered - altitude) : num NA NA NA 1290 1269 ...
## $ How many people use the toilets on this plot?: Children - Male : num NA 4 4 10 NA 5 2 NA 4 1 ...
## $ How many people use the toilets on this plot?: Children - Female : num NA 13 3 11 6 4 1 4 5 3 ...
## $ How many people use the toilets on this plot?: Adults - Male : num NA 3 8 7 4 4 2 2 6 3 ...
## $ How many people use the toilets on this plot?: Adults - Female : num NA 4 7 10 3 5 1 2 4 5 ...
## $ 2.1D : num NA 46 27 30 50 39 30 27 32 NA ...
## $ What is the designation of the respondent? : chr NA "1" "Bus conductor" "Tenant" ...
## $ Where do you dispose your solid wastes? : chr NA "Bin which is collected CBEs" "Bin which is self disposed" "Rubbish Pit" ...
## $ Where do you dispose your solid wastes? (Other (please specify)) - specify : chr NA NA NA NA ...
## $ 3.4 : chr NA "No" "No" "No" ...
## $ 3.4 (Don't Know) : logi NA NA NA NA NA NA ...
## $ 3.5 : chr NA NA NA NA ...
## $ 3.5 (Other (please specify)) - specify : chr NA NA NA NA ...
## $ 3.5 (Other (please specify)) - specify9 : chr NA NA NA NA ...
## $ 3.510 : chr NA NA NA NA ...
## $ 3.5 (Other (please specify)) - specify11 : chr NA NA NA NA ...
## $ 3.5 (Other (please specify)) - specify12 : chr NA NA NA NA ...
## $ How much did you pay for the upgrades in ZMW? : num NA NA NA NA NA NA 200 NA NA NA ...
## $ How did you finance for the upgrades? : chr NA NA NA NA ...
## [list output truncated]
The first observation is that there are a lot of attributes and most of them are just special cases when the interviewee has a very specific answer, making new columns for just a small part of the dataset, complete the majority of answers with NA’s.
The first operation made was to remove any personal identification (e.g.: Landlord name, Interviewee name, enumerator name ), and remove any row that the person who was interviewed declined to answer the form, since all atributes are NA’s after the question to participate.
It was necessary to rename some of the columns becase the attributes are subitens inside the question (e.g.: Record GPS is a question that is composed by multiple columns, like Latitude, Logitude, etc.), and their names are just numerical identifiers or too confusing to link with what they represent.
Kanyama <- Kanyama %>% rename("Record_plot_number" = `1.3`,
"Families_on_the_plot" = `1.4`,
"People_on_the_plot" = `1.5`,
"VIP toilets" = `1.6 - 1 - 1.5.1`,
"ECOSAN toilets" = `1.6 - 2 - 1.5.1`,
"Inside waterflush toilets" = `1.6 - 3 - 1.5.1`,
"Outside waterflush toilets" = `1.6 - 4 - 1.5.1`,
"Poor flush Inside" = `1.6 - 5 - 1.5.1`,
"Poor flush Outside" = `1.6 - 6 - 1.5.1`,
"Lined Pit latrine" = `1.6 - 7 - 1.5.1`,
"Unlined Pit latrine" = `1.6 - 8 - 1.5.1`,
"Disused/Buried" = `1.6 - 9 - 1.5.1`,
"Water source (fetch)" = `1.6.2`,
"Emptied the toilet before?" = `3.7`,
"Last time emptied" = `3.7.1`,
"Who emptied?" = `3.7.2`,
"Interface Layout" = `4.3 INTERFACE`,
"Width" = `4.4`,
"Diameter" = `4.416`,
"Length" = `4.5`,
"Height" = `4.6`,
"Perception of the fill level" = `4.7`,
"Is emptying feasible?" = `4.8`,
"Is washing hand basin present?" = `4.9`,
"Region" = `1.2`,
"Landlord live in the plot?" = `1.8`,
"Upgraded toilet recently?" = `3.4`
)
One operation to start getting the standard dataset is to start removing the attributes that are not used for most algorithms, these are most too specific to be used in a generalization process.
Another operation realized was the one to consider only rows that has just one toilet, because the cases with two or more are going to be treated afterwards.
simplify <- function(df){
df.reduced <- df %>%
select(-starts_with("DESCRIPTION OF RESPONDENT:"),
-starts_with("SELECT ZONE (Other"),
-starts_with("SELECT ZONE SECTION (Other"),
-`RECORD TYPE OF PROPERTY (Other (please specify)) - specify`,
- `1.3 (Don't Know)`,
- `1.5 (Don't Know)`,
-`1.6.1 Do you think there is space on this plot to construct another toilet?: No - If Yes how many more? If No, why is that the case?`,
-`1.6.2 (Other (please specify)) - specify`,
-`1.7`,
-`1.7.1`,
-`2.1D`,
-`What is the designation of the respondent?`,
-`3.4 (Don't Know)`,
-starts_with("3.5"),
-starts_with("What do you want to upgrade your toilet"),
-starts_with("What happens when the toilet gets full? (Other"),
-ends_with("Months - Age"),
-`3.7 (Other (please specify)) - specify`,
-starts_with("3.7.1 ("),
-starts_with("How did you know about the service of emptying your toilet"),
-starts_with("How would you rate your level of satisfaction with the service you received from the emptiers?" ),
-starts_with("3.7.3"),
-starts_with("Was the fee you paid affordable? (" ),
-starts_with("How often do you empty your toilet? ("),
-starts_with("3.8"),
-starts_with("4.1"),
-starts_with("4.2"),
-starts_with("4.3 SLAB"),
-starts_with("4.3 INTERFACE ("),
-starts_with("CONTAINMENT/SUBSTRUCTURE ("),
-starts_with("Record the observed shape of the substructure/containment ("),
-starts_with("TAKE PHOTO OF"),
-starts_with("TAKE PHOTO OF"),
-starts_with("4.8 (Don't Know)")
)
if(any(df.reduced$`Is there another toilet to observe` == "No")){
index <- grep("Is there another", colnames(df.reduced)) + 1
df.reduced <- select(df.reduced, -c(index:ncol(df.reduced)))
}
return(df.reduced)
}
Kanyama.reduced <- simplify(Kanyama)
summary(Kanyama.reduced)
## DATE OF INTERVIEW DATE OF INTERVIEW (Time Answered)
## Min. :2017-03-01 00:00:00 Min. :2017-03-20 08:45:07
## 1st Qu.:2017-04-04 00:00:00 1st Qu.:2017-04-04 10:20:53
## Median :2017-10-14 00:00:00 Median :2017-10-14 07:09:19
## Mean :2017-07-18 21:02:33 Mean :2017-07-19 02:14:03
## 3rd Qu.:2017-11-01 00:00:00 3rd Qu.:2017-10-31 19:39:56
## Max. :2017-12-04 00:00:00 Max. :2017-11-16 14:32:52
## NA's :137 NA's :137
## Are you willing to participate? RECORD TYPE OF PROPERTY
## Length:13552 Length:13552
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## SELECT ZONE SELECT ZONE SECTION Region
## Length:13552 Length:13552 Length:13552
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Record_plot_number Families_on_the_plot People_on_the_plot
## Length:13552 Min. :0.000e+00 Min. : 0.00
## Class :character 1st Qu.:1.000e+00 1st Qu.: 7.00
## Mode :character Median :3.000e+00 Median : 11.00
## Mean :7.450e+05 Mean : 13.34
## 3rd Qu.:4.000e+00 3rd Qu.: 17.00
## Max. :9.746e+09 Max. :1580.00
## NA's :470 NA's :657
## VIP toilets ECOSAN toilets Inside waterflush toilets
## Min. :0.000 Min. :0.000 Min. : 0.000
## 1st Qu.:1.000 1st Qu.:0.000 1st Qu.: 1.000
## Median :1.000 Median :1.000 Median : 1.000
## Mean :1.035 Mean :0.693 Mean : 1.192
## 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.: 1.000
## Max. :5.000 Max. :1.000 Max. :36.000
## NA's :13177 NA's :13438 NA's :12798
## Outside waterflush toilets Poor flush Inside Poor flush Outside
## Min. :0.00 Min. : 0.000 Min. : 0.000
## 1st Qu.:1.00 1st Qu.: 1.000 1st Qu.: 1.000
## Median :1.00 Median : 1.000 Median : 1.000
## Mean :1.14 Mean : 0.984 Mean : 1.068
## 3rd Qu.:1.00 3rd Qu.: 1.000 3rd Qu.: 1.000
## Max. :8.00 Max. :11.000 Max. :11.000
## NA's :13009 NA's :13187 NA's :10919
## Lined Pit latrine Unlined Pit latrine Disused/Buried
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 0.000
## Median : 1.000 Median : 1.000 Median : 1.000
## Mean : 1.059 Mean : 1.014 Mean : 0.672
## 3rd Qu.: 1.000 3rd Qu.: 1.000 3rd Qu.: 1.000
## Max. :32.000 Max. :12.000 Max. :11.000
## NA's :5410 NA's :12349 NA's :9362
## Total number of toilets{0}
## Mode :logical
## FALSE:22
## TRUE :6636
## NA's :6894
##
##
##
## 1.6.1 Do you think there is space on this plot to construct another toilet?: Yes - If Yes how many more? If No, why is that the case?
## Length:13552
## Class :character
## Mode :character
##
##
##
##
## Water source (fetch) Landlord live in the plot? 1.9 (latitude)
## Length:13552 Length:13552 Min. :-32.36
## Class :character Class :character 1st Qu.:-15.44
## Mode :character Mode :character Median :-15.43
## Mean :-15.43
## 3rd Qu.:-15.43
## Max. :-15.42
## NA's :1012
## 1.9 (longitude) 1.9 (administrative region) 1.9 (accuracy)
## Min. : 9.492 Length:13552 Min. : 1.400
## 1st Qu.:28.229 Class :character 1st Qu.: 3.900
## Median :28.238 Mode :character Median : 3.900
## Mean :28.238 Mean : 3.979
## 3rd Qu.:28.248 3rd Qu.: 3.900
## Max. :28.320 Max. :96.000
## NA's :1012 NA's :1139
## 1.9 (altitude) 1.9 (Time Answered)
## Min. :-1562 Min. :2017-03-20 09:01:29
## 1st Qu.: 1269 1st Qu.:2017-04-05 07:17:32
## Median : 1275 Median :2017-10-16 13:00:48
## Mean : 1275 Mean :2017-07-23 23:06:55
## 3rd Qu.: 1282 3rd Qu.:2017-11-01 09:59:35
## Max. : 1668 Max. :2017-11-16 14:35:04
## NA's :1141 NA's :1012
## 1.9 (Location Answered) (latitude) 1.9 (Location Answered) (longitude)
## Min. :-15.47 Min. :28.21
## 1st Qu.:-15.44 1st Qu.:28.23
## Median :-15.43 Median :28.24
## Mean :-15.43 Mean :28.24
## 3rd Qu.:-15.43 3rd Qu.:28.25
## Max. :-15.42 Max. :28.31
## NA's :1020 NA's :1020
## 1.9 (Location Answered - accuracy) 1.9 (Location Answered - altitude)
## Min. : 1.100 Min. :-616.1
## 1st Qu.: 3.900 1st Qu.:1268.6
## Median : 3.900 Median :1275.3
## Mean : 3.959 Mean :1274.5
## 3rd Qu.: 3.900 3rd Qu.:1281.5
## Max. :96.000 Max. :1667.5
## NA's :1020 NA's :1022
## How many people use the toilets on this plot?: Children - Male
## Min. : 0.000
## 1st Qu.: 1.000
## Median : 2.000
## Mean : 4.266
## 3rd Qu.: 4.000
## Max. :2971.000
## NA's :500
## How many people use the toilets on this plot?: Children - Female
## Min. : 0.000
## 1st Qu.: 1.000
## Median : 3.000
## Mean : 5.148
## 3rd Qu.: 4.000
## Max. :5632.000
## NA's :471
## How many people use the toilets on this plot?: Adults - Male
## Min. : 0.000
## 1st Qu.: 2.000
## Median : 3.000
## Mean : 4.492
## 3rd Qu.: 5.000
## Max. :1000.000
## NA's :230
## How many people use the toilets on this plot?: Adults - Female
## Min. : 0.000
## 1st Qu.: 2.000
## Median : 3.000
## Mean : 4.839
## 3rd Qu.: 5.000
## Max. :1000.000
## NA's :193
## Where do you dispose your solid wastes?
## Length:13552
## Class :character
## Mode :character
##
##
##
##
## Where do you dispose your solid wastes? (Other (please specify)) - specify
## Length:13552
## Class :character
## Mode :character
##
##
##
##
## Upgraded toilet recently? How much did you pay for the upgrades in ZMW?
## Length:13552 Min. : 0
## Class :character 1st Qu.: 0
## Mode :character Median : 300
## Mean : 1319
## 3rd Qu.: 1700
## Max. :25000
## NA's :12725
## How did you finance for the upgrades?
## Length:13552
## Class :character
## Mode :character
##
##
##
##
## How did you finance for the upgrades? (Other (please specify)) - specify
## Length:13552
## Class :character
## Mode :character
##
##
##
##
## Age of toilet: Years - Age Age of toilet 2: Years - Age
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 3.000 1st Qu.: 0.000
## Median : 5.000 Median : 0.000
## Mean : 6.053 Mean : 2.488
## 3rd Qu.: 8.000 3rd Qu.: 3.000
## Max. :400.000 Max. :65.000
## NA's :1060 NA's :11325
## Age of toilet 3: Years - Age What happens when the toilet gets full?
## Min. : 0.000 Length:13552
## 1st Qu.: 0.000 Class :character
## Median : 0.000 Mode :character
## Mean : 0.812
## 3rd Qu.: 0.000
## Max. :50.000
## NA's :12005
## Emptied the toilet before? Last time emptied Who emptied?
## Length:13552 Length:13552 Length:13552
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## 3.7.2 (Other (please specify)) - specify Was the fee you paid affordable?
## Length:13552 Length:13552
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## How often do you empty your toilet?
## Length:13552
## Class :character
## Mode :character
##
##
##
##
## When next do you think your toilet will be due for emptying?: Months from now - Period
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 0.000
## Mean : 1.843
## 3rd Qu.: 2.000
## Max. :1015.000
## NA's :7767
## When next do you think your toilet will be due for emptying?: Years from now - Period
## Min. : 0.000
## 1st Qu.: 1.000
## Median : 3.000
## Mean : 4.571
## 3rd Qu.: 5.000
## Max. :2017.000
## NA's :4420
## Interface Layout CONTAINMENT/SUBSTRUCTURE
## Length:13552 Length:13552
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Condition of the components: Roof - Score Condition
## Min. :1.000
## 1st Qu.:1.000
## Median :3.000
## Mean :2.547
## 3rd Qu.:4.000
## Max. :5.000
## NA's :203
## Condition of the components: Wall - Score Condition
## Min. :1.000
## 1st Qu.:3.000
## Median :4.000
## Mean :3.473
## 3rd Qu.:4.000
## Max. :5.000
## NA's :153
## Condition of the components: Floor - Score Condition
## Min. :1.000
## 1st Qu.:3.000
## Median :4.000
## Mean :3.468
## 3rd Qu.:4.000
## Max. :5.000
## NA's :168
## Condition of the components: Interface - Score Condition
## Min. :1.000
## 1st Qu.:3.000
## Median :3.000
## Mean :3.443
## 3rd Qu.:4.000
## Max. :5.000
## NA's :175
## Condition of the components: Substructure/Containment - Score Condition
## Min. :1.000
## 1st Qu.:3.000
## Median :3.000
## Mean :3.443
## 3rd Qu.:4.000
## Max. :5.000
## NA's :234
## Record the observed shape of the substructure/containment
## Length:13552
## Class :character
## Mode :character
##
##
##
##
## Width Diameter Length
## Min. : 0.0 Min. : 1.0 Min. : 0.0
## 1st Qu.: 120.0 1st Qu.: 4.0 1st Qu.: 2.4
## Median : 170.0 Median :169.0 Median : 140.0
## Mean : 188.5 Mean :155.9 Mean : 131.7
## 3rd Qu.: 208.0 3rd Qu.:221.0 3rd Qu.: 238.0
## Max. :169210.0 Max. :600.0 Max. :3658.0
## NA's :493 NA's :13336 NA's :503
## Is height of sludge measurable? Height
## Length:13552 Min. : 0.00
## Class :character 1st Qu.: 1.40
## Mode :character Median : 2.30
## Mean : 65.11
## 3rd Qu.: 100.00
## Max. :2100.00
## NA's :3590
## Explain reason for not being able to take the reading of the height
## Length:13552
## Class :character
## Mode :character
##
##
##
##
## Perception of the fill level Is emptying feasible?
## Min. :1.000 Length:13552
## 1st Qu.:2.000 Class :character
## Median :3.000 Mode :character
## Mean :2.821
## 3rd Qu.:3.000
## Max. :5.000
## NA's :877
## Is washing hand basin present? Any overflow/flooding?
## Length:13552 Length:13552
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## Is the toilet easily accessible to the following?: Vacuum Tanker - Yes
## Mode :logical
## FALSE:476
## TRUE :9945
## NA's :3131
##
##
##
## Is the toilet easily accessible to the following?: Vacuum Tanker - No
## Mode :logical
## FALSE:178
## TRUE :3477
## NA's :9897
##
##
##
## Is the toilet easily accessible to the following?: Light Truck - Yes
## Mode :logical
## FALSE:262
## TRUE :11315
## NA's :1975
##
##
##
## Is the toilet easily accessible to the following?: Light Truck - No
## Mode :logical
## FALSE:116
## TRUE :2051
## NA's :11385
##
##
##
## Is the toilet easily accessible to the following?: Push Cart - Yes
## Mode :logical
## FALSE:30
## TRUE :13098
## NA's :424
##
##
##
## Is the toilet easily accessible to the following?: Push Cart - No
## Mode :logical
## FALSE:94
## TRUE :266
## NA's :13192
##
##
##
## Is the toilet easily accessible to the following people?: Persons with dissability - Yes
## Mode :logical
## FALSE:310
## TRUE :9406
## NA's :3836
##
##
##
## Is the toilet easily accessible to the following people?: Persons with dissability - No
## Mode :logical
## FALSE:150
## TRUE :4055
## NA's :9347
##
##
##
## Is the toilet easily accessible to the following people?: Children - Yes
## Mode :logical
## FALSE:142
## TRUE :11974
## NA's :1436
##
##
##
## Is the toilet easily accessible to the following people?: Children - No
## Mode :logical
## FALSE:188
## TRUE :1489
## NA's :11875
##
##
##
## Is the toilet easily accessible to the following people?: Women at night - Yes
## Mode :logical
## FALSE:282
## TRUE :9215
## NA's :4055
##
##
##
## Is the toilet easily accessible to the following people?: Women at night - No
## Mode :logical
## FALSE:351
## TRUE :4185
## NA's :9016
##
##
##
## Is there another toilet to observe
## Length:13552
## Class :character
## Mode :character
##
##
##
##
Some attributes can be unified into a one single column, this happens because some info are splitted into ‘Yes and No’ and for these two columns exists a boolean indicating ‘True or False’, this can be transformed into a single columns where ‘False’ is equals to ‘No’ and ‘True’ is equals to ‘Yes’, it was made a function to do that process automaticaly, following a pattern.
grouping.columns <- function(df, column1, column2, name){
res <- select(df, -starts_with(name))
mutate(res, !!name := case_when(
column1 == T ~ T,
column1 != T & column2 == T ~ F,
TRUE ~ NA))
}
Kanyama.reduced <- grouping.columns(Kanyama.reduced,
Kanyama.reduced$`Is the toilet easily accessible to the following people?: Children - Yes`,
Kanyama.reduced$`Is the toilet easily accessible to the following people?: Children - No`,
"Is the toilet easily accessible to the following people?: Children")
Kanyama.reduced <- grouping.columns(Kanyama.reduced,
Kanyama.reduced$`Is the toilet easily accessible to the following people?: Persons with dissability - Yes`,
Kanyama.reduced$`Is the toilet easily accessible to the following people?: Persons with dissability - No`,
"Is the toilet easily accessible to the following people?: Persons with dissability")
Kanyama.reduced <- grouping.columns(Kanyama.reduced,
Kanyama.reduced$`Is the toilet easily accessible to the following people?: Women at night - Yes`,
Kanyama.reduced$`Is the toilet easily accessible to the following people?: Women at night - No`,
"Is the toilet easily accessible to the following people?: Women at night")
Kanyama.reduced <- grouping.columns(Kanyama.reduced,
Kanyama.reduced$`Is the toilet easily accessible to the following?: Vacuum Tanker - Yes`,
Kanyama.reduced$`Is the toilet easily accessible to the following?: Vacuum Tanker - No`,
"Is the toilet easily accessible to the following?: Vacuum Tanker")
Kanyama.reduced <- grouping.columns(Kanyama.reduced,
Kanyama.reduced$`Is the toilet easily accessible to the following?: Light Truck - Yes` ,
Kanyama.reduced$`Is the toilet easily accessible to the following?: Light Truck - No`,
"Is the toilet easily accessible to the following?: Light Truck")
Kanyama.reduced <- grouping.columns(Kanyama.reduced,
Kanyama.reduced$`Is the toilet easily accessible to the following?: Push Cart - Yes`,
Kanyama.reduced$`Is the toilet easily accessible to the following?: Push Cart - No`,
"Is the toilet easily accessible to the following?: Push Cart")
The chunk below remove the multiple column question, because the simplify function filters the dataset to only single toilet rows
Kanyama.reduced <- select(Kanyama.reduced, -`Is there another toilet to observe`)
Some observations have more than one toilet observed at the same time, making the form to repeat the questions to any other toilet analyzed. This exception will extend the number of columns on the dataset, even if the majority is formed by single toilet forms. To solve this problem is necessary to separate the cases with multiple toilets and make new rows with them, expanding vertically instead of horizontaly.
more.than.1.toilet <- Kanyama %>% filter(`Is there another toilet to observe` == "Yes")
more.than.2.toilet <- Kanyama %>% filter(`Is there a third toilet to observe` == "Yes")
The process is resumed as:
more.than.1.toilet <- simplify(more.than.1.toilet)
index_beg <- grep("Is there another", colnames(more.than.1.toilet))
index_end <- grep("Is there a third", colnames(more.than.1.toilet))
index_sub <- grep("Inter", colnames(more.than.1.toilet))
test1 <- more.than.1.toilet[, 1:55]
test2 <- more.than.1.toilet[,87:117]
test3 <- cbind(test1,test2)
test3 <- grouping.columns(test3,
test3$`Is the toilet easily accessible to the following people?: Children - Yes61`,
test3$`Is the toilet easily accessible to the following people?: Children - No62`,
"Is the toilet easily accessible to the following people?: Children")
test3 <- grouping.columns(test3,
test3$`Is the toilet easily accessible to the following people?: Persons with dissability - Yes59`,
test3$`Is the toilet easily accessible to the following people?: Persons with dissability - No60`,
"Is the toilet easily accessible to the following people?: Persons with dissability")
test3 <- grouping.columns(test3,
test3$`Is the toilet easily accessible to the following people?: Women at night - Yes63`,
test3$`Is the toilet easily accessible to the following people?: Women at night - No64`,
"Is the toilet easily accessible to the following people?: Women at night")
test3 <- grouping.columns(test3,
test3$`Is the toilet easily accessible to the following?: Vacuum Tanker - Yes53`,
test3$`Is the toilet easily accessible to the following?: Vacuum Tanker - No54`,
"Is the toilet easily accessible to the following?: Vacuum Tanker")
test3 <- grouping.columns(test3,
test3$`Is the toilet easily accessible to the following?: Light Truck - Yes55` ,
test3$`Is the toilet easily accessible to the following?: Light Truck - No56`,
"Is the toilet easily accessible to the following?: Light Truck")
test3 <- grouping.columns(test3,
test3$`Is the toilet easily accessible to the following?: Push Cart - Yes57`,
test3$`Is the toilet easily accessible to the following?: Push Cart - No58`,
"Is the toilet easily accessible to the following?: Push Cart")
rm(test1,test2)
test3 <- select(test3, -`Is there a third toilet to observe`)
The process with 3 toilets is similar to the 2 toilets case, just change the column’s index.
more.than.2.toilet <- simplify(more.than.2.toilet)
test4 <- more.than.2.toilet[,118:ncol(more.than.2.toilet)]
test5 <- more.than.2.toilet[,1:55]
test6 <- cbind(test5,test4)
test6 <- grouping.columns(test6,
test6$`Is the toilet easily accessible to the following people?: Children - Yes109`,
test6$`Is the toilet easily accessible to the following people?: Children - No110`,
"Is the toilet easily accessible to the following people?: Children")
test6 <- grouping.columns(test6,
test6$`Is the toilet easily accessible to the following people?: Persons with dissability - Yes107`,
test6$`Is the toilet easily accessible to the following people?: Persons with dissability - No108`,
"Is the toilet easily accessible to the following people?: Persons with dissability")
test6 <- grouping.columns(test6,
test6$`Is the toilet easily accessible to the following people?: Women at night - Yes111`,
test6$`Is the toilet easily accessible to the following people?: Women at night - No112`,
"Is the toilet easily accessible to the following people?: Women at night")
test6 <- grouping.columns(test6,
test6$`Is the toilet easily accessible to the following?: Vacuum Tanker - Yes101`,
test6$`Is the toilet easily accessible to the following?: Vacuum Tanker - No102`,
"Is the toilet easily accessible to the following?: Vacuum Tanker")
test6 <- grouping.columns(test6,
test6$`Is the toilet easily accessible to the following?: Light Truck - Yes103` ,
test6$`Is the toilet easily accessible to the following?: Light Truck - No104`,
"Is the toilet easily accessible to the following?: Light Truck")
test6 <- grouping.columns(test6,
test6$`Is the toilet easily accessible to the following?: Push Cart - Yes105`,
test6$`Is the toilet easily accessible to the following?: Push Cart - No106`,
"Is the toilet easily accessible to the following?: Push Cart")
rm(test4,test5)
The final operation on the data analysis’ first stage is to create a single dataset that unifies all the cases above. In the end, a new dataset is created, it is named ‘Kanyama_reduced’, and it is going to be used for a more fine tuning on the data aspects
names(test3) <- names(Kanyama.reduced)
names(test6) <- names(Kanyama.reduced)
Kanyama.final <- rbind(Kanyama.reduced, test3, test6)
write.csv(Kanyama.final, file = "Kanyama_reduced.csv", row.names = F)
The second part of this report is focused on doing a more specific cleaning on the dataset, now removing some “non-obvious-removable” attributes, and turning the dataset into a more generic structure.
Kanyama <- read.csv("Kanyama_reduced.csv")
Kanyama <- Kanyama %>% select(-c(Are.you.willing.to.participate.,
Record_plot_number, Where.do.you.dispose.your.solid.wastes...Other..please.specify.....specify,
How.much.did.you.pay.for.the.upgrades.in.ZMW.,
How.did.you.finance.for.the.upgrades., How.did.you.finance.for.the.upgrades...Other..please.specify.....specify,
X3.7.2..Other..please.specify.....specify,
Condition.of.the.components..Roof...Score.Condition,
Condition.of.the.components..Wall...Score.Condition,
Condition.of.the.components..Floor...Score.Condition,
DATE.OF.INTERVIEW..Time.Answered.
))
The code above reads the csv generated on the report’s first part, and removes some attributes that was non-essential for the next steps.
P.s.: The selection of these categories is based on a personal point of view and any changing is welcoming
The next part is to turn some attributes, from a text answer to a categorical answer, helping in plot schemes where a limited number of categories is needed.
The code below, transform any kind of text answer into a ‘True’ or ‘False’ approach, if there are some words like “No”, “None” or or numbers like ‘0’ it is set a “False” value, else it is set a “True” value.
Kanyama <- Kanyama %>% mutate(Enough.space.another.toilet =
case_when(is.na(X1.6.1.Do.you.think.there.is.space.on.this.plot.to.construct.another.toilet...Yes...If.Yes.how.many.more..If.No..why.is.that.the.case.) ~ FALSE,
grepl("No", X1.6.1.Do.you.think.there.is.space.on.this.plot.to.construct.another.toilet...Yes...If.Yes.how.many.more..If.No..why.is.that.the.case.) == TRUE ~ FALSE,
grepl("\\d0", X1.6.1.Do.you.think.there.is.space.on.this.plot.to.construct.another.toilet...Yes...If.Yes.how.many.more..If.No..why.is.that.the.case.) == TRUE ~ TRUE,
grepl("0", X1.6.1.Do.you.think.there.is.space.on.this.plot.to.construct.another.toilet...Yes...If.Yes.how.many.more..If.No..why.is.that.the.case.) == TRUE ~ FALSE,
grepl("None", X1.6.1.Do.you.think.there.is.space.on.this.plot.to.construct.another.toilet...Yes...If.Yes.how.many.more..If.No..why.is.that.the.case.) == TRUE ~ FALSE,
grepl("Zero", X1.6.1.Do.you.think.there.is.space.on.this.plot.to.construct.another.toilet...Yes...If.Yes.how.many.more..If.No..why.is.that.the.case.) == TRUE ~ FALSE,
TRUE ~ TRUE))
The next chunk of code change the numeric notation to a categorical textual approach and grouping some answers where multiple options were made, again to make easier for the plotting steps.
Kanyama <- Kanyama %>% mutate(What.happens.when.the.toilet.gets.full. =
case_when(What.happens.when.the.toilet.gets.full. == "1" ~ "Bury and dig another one",
What.happens.when.the.toilet.gets.full. == "2" ~ "Empty and reuse",
What.happens.when.the.toilet.gets.full. == "3" ~ "Abandone",
What.happens.when.the.toilet.gets.full. == "Other (please specify)" ~ "Other (please specify)",
TRUE ~ "Multiple options"))
This block of code belows shows the unification of the camps month and year from the question asking about the time to empty the toilet. The new columns is formed in a X.Y structure, where the X is represented by the years and the Y is formed by the months. Also, the dates with NA’s where replaced by zeroes (it can be also transformed into NA later)
P.s.: this transforming need a bit more of treatment for cases like X.10 to X.12 and situations where the answer was all made in months instead of years and months (e.g.: 30 months instead of 2 years and 6 months)
Kanyama <- Kanyama %>% mutate(Toilet.emptying.time = paste(When.next.do.you.think.your.toilet.will.be.due.for.emptying...Years.from.now...Period,
When.next.do.you.think.your.toilet.will.be.due.for.emptying...Months.from.now...Period,
sep = "."),
Toilet.emptying.time = gsub("NA", "0", Toilet.emptying.time))
This part transform the numeric info about the toilets into a categorical information and grouping the multiple answer into the “Multiple option” category
Kanyama <- Kanyama %>% mutate(Interface.Layout =
case_when(Interface.Layout == "1" ~ "Sit down toilet with manual (hand) flushing system",
Interface.Layout == "2" ~ "Sit down toilet with pour (bucket) flushing system",
Interface.Layout == "3" ~ "Squat with pour (bucket) flushing system",
Interface.Layout == "4" ~ "Squat hole (Dry toilet)",
Interface.Layout == "5" ~ "Urine Diversion Toilet",
Interface.Layout == "6" ~ "Other",
is.na(Interface.Layout) ~ NA_character_,
TRUE ~ "Multiple choice"))
The next part treats to unify the people’s gender categories (Adult/Male, Adult/Female, Children/Male, Children/Female) to just “Children” and “Adult” categories, and also creating a more general attribute containning just the people who uses the toilet.
Kanyama <- Kanyama %>% mutate(adults.using.toilet = rowSums(select(Kanyama, How.many.people.use.the.toilets.on.this.plot...Adults...Female, How.many.people.use.the.toilets.on.this.plot...Adults...Male), na.rm = T),
children.using.toilet = rowSums(select(Kanyama, How.many.people.use.the.toilets.on.this.plot...Children...Male, How.many.people.use.the.toilets.on.this.plot...Children...Female), na.rm = T),
people.using.toilet = rowSums(select(Kanyama, How.many.people.use.the.toilets.on.this.plot...Children...Male, How.many.people.use.the.toilets.on.this.plot...Children...Female, How.many.people.use.the.toilets.on.this.plot...Adults...Male, How.many.people.use.the.toilets.on.this.plot...Adults...Female), na.rm = T))
This chunk just count a total number of toilets, something essential that was missing in the original dataset, of course the individual numbers of each type of toilet is important and can be included at any time.
Kanyama <- Kanyama %>% mutate(Total.number.of.toilets.0. = rowSums(select(Kanyama, VIP.toilets: Disused.Buried), na.rm = T))
Removing more attributes.
Kanyama <- Kanyama %>% select(-c(X1.6.1.Do.you.think.there.is.space.on.this.plot.to.construct.another.toilet...Yes...If.Yes.how.many.more..If.No..why.is.that.the.case., X1.9..administrative.region.: X1.9..Location.Answered...altitude., Explain.reason.for.not.being.able.to.take.the.reading.of.the.height. ))
It was grouped the answers where multiple options were given, creating the field “Multiple choices”.
Kanyama <- Kanyama %>% mutate(Where.do.you.dispose.your.solid.wastes. = case_when(grepl(",",Where.do.you.dispose.your.solid.wastes.) == TRUE ~ "Multiple Choices",
Where.do.you.dispose.your.solid.wastes. == "null" ~ NA_character_,
TRUE ~ as.character(Where.do.you.dispose.your.solid.wastes.)))
The final part is just a dataset organization, putting similar attirbutes next to each other, and saving into a brand new csv file.
Kanyama_essential <- Kanyama %>% select(c(
DATE.OF.INTERVIEW,
RECORD.TYPE.OF.PROPERTY,
Region,
People_on_the_plot,
Landlord.live.in.the.plot.,
Total.number.of.toilets.0.,
Enough.space.another.toilet,
Water.source..fetch.,
X1.9..latitude.,
X1.9..longitude.,
adults.using.toilet,
children.using.toilet,
people.using.toilet,
Where.do.you.dispose.your.solid.wastes.,
starts_with("Age.of.toilet"),
What.happens.when.the.toilet.gets.full.,
Toilet.emptying.time,
Interface.Layout,
CONTAINMENT.SUBSTRUCTURE,
Record.the.observed.shape.of.the.substructure.containment.,
Width:Height,
Perception.of.the.fill.level:Is.the.toilet.easily.accessible.to.the.following...Push.Cart
))
write.csv(Kanyama, file = "Kanyama_organized.csv", row.names = F)
write.csv(Kanyama_essential, file = "Kanyama_to_plot.csv", row.names = F)
After the whole claning and organizing part, this step shows some results of what can be done with the actual dataset. A more organized dataset helps the data analyst to focus on the visualization and algorithms and avoid the confusion to understand too many attributes, many of them with little or no importance on the final result.
The two datasets created are using depending on what kind of visuazation:
Kanyama.plot: Is used in cases that the location (Latitude, Longitude) is not used, adding the cases that have NA’s in the location.
Kanyama.valid: Is used when the Location is necessary, ignoring the cases where there is no Latitude or Longitude
Kanyama.plot <- read.csv("Kanyama_to_plot.csv")
Kanyama.valid <- Kanyama.plot %>% filter(!is.na(X1.9..latitude.) & !is.na(X1.9..longitude.)) %>% filter(X1.9..latitude. > -20)
Structure:
pal <- colorFactor("Accent", domain = Kanyama.valid$Interface.Layout, na.color = "#000000" )
plot <- leaflet(data = Kanyama.valid) %>%
addProviderTiles(providers$Esri.WorldImagery) %>%
addCircleMarkers(lng = ~X1.9..longitude.,
lat = ~X1.9..latitude.,
color = ~pal(Kanyama.valid$Interface.Layout), label = ~Water.source..fetch.,
radius = 10, fillOpacity = 0.5, stroke = F) %>% addLegend("bottomright",
pal = pal,
values = ~Interface.Layout,
title = "Type of toilet", na.label = "Not Available")
plot
Structure:
type.of.toilet <- Kanyama.plot %>% group_by(Interface.Layout) %>% summarise(percentage = round(n()/nrow(Kanyama.plot) * 100, 1), count = n())
## Warning: Factor `Interface.Layout` contains implicit NA, consider using
## `forcats::fct_explicit_na`
#Percentage of toilets ----
plot_percentage <- ggplot(data = type.of.toilet, aes(x = Interface.Layout, y = percentage, fill = Interface.Layout)) +
geom_bar(stat = 'identity') +
theme_light() +
geom_text(aes(label=percentage), vjust=-0.3, size=3.5) +
theme(axis.text.x = element_blank(), axis.title.x = element_blank())+
labs(title = "Percentage of toilets' type", subtitle = "Kanyama", fill = "Toilet type", y = "Percentage")
plot_percentage
Structure similar as the percentage but now showing the total ammount of toilets per type
plot_count <- ggplot(data = Kanyama.plot, aes(x = Interface.Layout, fill = Interface.Layout)) +
geom_histogram(stat = 'count') +
theme_light() +
theme(axis.title.x = element_blank(), axis.text.x = element_blank()) +
labs(title = "Quantity of toilets in Kanyama", subtitle = paste("Based on", nrow(Kanyama.plot), "plots"), fill = "Toilet type", y = "Number of toilets")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
plot_count
type.of.water <- Kanyama.plot %>% group_by(Water.source..fetch.) %>% summarise(count = n())
## Warning: Factor `Water.source..fetch.` contains implicit NA, consider using
## `forcats::fct_explicit_na`
plot_water <- plot_ly(type.of.water, labels=~Water.source..fetch., values=~count, type = "pie") %>%
layout(title = "Water Source in Kanyama",
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
plot_water
On the next group of chunks, it is represented which plots are acessible for each transport or people, where the red points indicates that the place is not safe or acessible and green dots indicates that the plot is acessible or safe.
#Vacuum Tanker
pal_bool <- colorFactor(c("red", "green"), domain = Kanyama.valid$Is.the.toilet.easily.accessible.to.the.following...Vacuum.Tanker, na.color = "#000000")
plot_Vacuum <- leaflet(data = Kanyama.valid) %>% addProviderTiles(providers$Esri.WorldImagery) %>%
addCircleMarkers(lng = ~X1.9..longitude.,
lat = ~X1.9..latitude.,
color = ~pal_bool(Kanyama.valid$Is.the.toilet.easily.accessible.to.the.following...Vacuum.Tanker), label = ~Water.source..fetch.,
radius = 1, fillOpacity = 0.3, stroke = T) %>% addLegend("bottomright",
pal = pal_bool,
values = ~Is.the.toilet.easily.accessible.to.the.following...Vacuum.Tanker,
title = "Avaiable for the Vacuum Tanker", na.label = "Not Available")
plot_Vacuum
plot_Truck <-leaflet(data = Kanyama.valid) %>% addProviderTiles(providers$Esri.WorldImagery) %>%
addCircleMarkers(lng = ~X1.9..longitude.,
lat = ~X1.9..latitude.,
color = ~pal_bool(Kanyama.valid$Is.the.toilet.easily.accessible.to.the.following...Light.Truck), label = ~Water.source..fetch.,
radius = 1, fillOpacity = 0.3, stroke = T) %>% addLegend("bottomright",
pal = pal_bool,
values = ~Is.the.toilet.easily.accessible.to.the.following...Light.Truck,
title = "Avaiable for the Light Truck", na.label = "Not Available")
plot_Truck
plot_Cart <-leaflet(data = Kanyama.valid) %>% addProviderTiles(providers$Esri.WorldImagery) %>%
addCircleMarkers(lng = ~X1.9..longitude.,
lat = ~X1.9..latitude.,
color = ~pal_bool(Kanyama.valid$Is.the.toilet.easily.accessible.to.the.following...Push.Cart), label = ~Water.source..fetch.,
radius = 1, fillOpacity = 0.3, stroke = T) %>% addLegend("bottomright",
pal = pal_bool,
values = ~Is.the.toilet.easily.accessible.to.the.following...Push.Cart,
title = "Avaiable for the Push Cart", na.label = "Not Available")
plot_Cart
plot_Children <-leaflet(data = Kanyama.valid) %>% addProviderTiles(providers$Esri.WorldImagery) %>%
addCircleMarkers(lng = ~X1.9..longitude.,
lat = ~X1.9..latitude.,
color = ~pal_bool(Kanyama.valid$Is.the.toilet.easily.accessible.to.the.following.people...Children), label = ~Water.source..fetch.,
radius = 1, fillOpacity = 0.3, stroke = T) %>% addLegend("bottomright",
pal = pal_bool,
values = ~Is.the.toilet.easily.accessible.to.the.following.people...Children,
title = "Avaiable for Children", na.label = "Not Available")
plot_Children
plot_Women <-leaflet(data = Kanyama.valid) %>% addProviderTiles(providers$Esri.WorldImagery) %>%
addCircleMarkers(lng = ~X1.9..longitude.,
lat = ~X1.9..latitude.,
color = ~pal_bool(Kanyama.valid$Is.the.toilet.easily.accessible.to.the.following.people...Women.at.night), label = ~Water.source..fetch.,
radius = 1, fillOpacity = 0.3, stroke = T) %>% addLegend("bottomright",
pal = pal_bool,
values = ~Is.the.toilet.easily.accessible.to.the.following.people...Women.at.night,
title = "Avaiable for women at night", na.label = "Not Available")
plot_Women
plot_dis <-leaflet(data = Kanyama.valid) %>% addProviderTiles(providers$Esri.WorldImagery) %>%
addCircleMarkers(lng = ~X1.9..longitude.,
lat = ~X1.9..latitude.,
color = ~pal_bool(Kanyama.valid$Is.the.toilet.easily.accessible.to.the.following.people...Persons.with.dissability), label = ~Water.source..fetch.,
radius = 1, fillOpacity = 0.3, stroke = T) %>% addLegend("bottomright",
pal = pal_bool,
values = ~Kanyama.valid$Is.the.toilet.easily.accessible.to.the.following.people...Persons.with.dissability,
title = "Avaiable for peopley with dissability", na.label = "Not Available")
plot_dis
In this plot, some places where removed because they are too far away from the Kanyama place. After that, the number of people where categorized into:
Also, it was removed places that has no people living on their plots (e.g.: Churches, schools, Markets, etc.)
#People per plot (Average) ----
filter_outside <- Kanyama.valid %>% filter(X1.9..longitude. < 28.275)
filter_outside$fact_people_per_plot <- cut(filter_outside$People_on_the_plot, breaks = c(0,5,10, 20, 50, 50000),
labels = c("Up to 5 people","Up to 10 people", "Up to 20 people", "Up to 50 people", "More than 50 people" ) , include.lowest = T)
people_per_plot <- filter_outside %>% filter(!is.na(People_on_the_plot))
people_pal <- colorFactor("Spectral", domain = people_per_plot$fact_people_per_plot, na.color = "#000000")
plot_avg_people <- leaflet(data = people_per_plot) %>% addProviderTiles(providers$Esri.WorldImagery) %>%
addCircleMarkers(lng = ~X1.9..longitude.,
lat = ~X1.9..latitude.,
color = ~people_pal(people_per_plot$fact_people_per_plot), label = ~Water.source..fetch.,
radius = 5, opacity = 0.5, stroke = T) %>% addLegend("bottomright",
pal = people_pal,
values = ~fact_people_per_plot,
title = "Toilet per people", na.label = "Not Available")
plot_avg_people
On this plot, it was created new field that realtes people per toilet and toilet per people, removes the case where the number of people or toilets are 0 to avoid a division by zero.
also the results were sliced in a similar way as the people per plot:
People per toilet
Toilet per people
filter_outside <- filter_outside %>% mutate(people_per_toilet= case_when(Total.number.of.toilets.0. != 0 ~ people.using.toilet/Total.number.of.toilets.0.,
TRUE ~ 0))
filter_outside <- filter_outside %>% mutate(toilet_per_people= case_when(people.using.toilet != 0 ~ Total.number.of.toilets.0./people.using.toilet,
TRUE ~ 0))
filter_outside$fact_ppl_toilet <- cut(filter_outside$people_per_toilet, breaks = c(0,1,5, 10, 20, 50, 100,5000),
labels = c("1 person","Up to 5 people", "Up to 10 people", "Up to 20 people", "Up to 50 people","Up to 100 people", "More than 100 people" ) ,include.lowest = T)
filter_outside$fact_toilet_ppl <- cut(filter_outside$toilet_per_people, breaks = c(0,1, 2, 3, 50),
labels = c("Up to 1 toilets","Up to 2 toilets", "Up to 3 toilets", "More than 3 toilets" ) , include.lowest = T)
cont_pal <- colorFactor("Set1", domain = filter_outside$fact_toilet_ppl, na.color = "#000000" )
cont_pal2 <- colorFactor("Set1", domain = filter_outside$fact_ppl_toilet, na.color = "#000000" )
plot_toilet_ppl <- leaflet(data = filter_outside) %>% addProviderTiles(providers$Esri.WorldImagery) %>%
addCircleMarkers(lng = ~X1.9..longitude.,
lat = ~X1.9..latitude.,
color = ~cont_pal(filter_outside$fact_toilet_ppl), label = ~Water.source..fetch.,
radius = 5, opacity = 0.5, stroke = T) %>% addLegend("bottomright",
pal = cont_pal,
values = ~fact_toilet_ppl,
title = "Toilet per people", na.label = "Not Available")
plot_ppl_toilet <- leaflet(data = filter_outside) %>% addProviderTiles(providers$Esri.WorldImagery) %>%
addCircleMarkers(lng = ~X1.9..longitude.,
lat = ~X1.9..latitude.,
color = ~cont_pal2(filter_outside$fact_ppl_toilet), label = ~Water.source..fetch.,
radius = 5, opacity = 0.5, stroke = T) %>% addLegend("bottomright",
pal = cont_pal2,
values = ~fact_ppl_toilet,
title = "People per toilet", na.label = "Not Available")
plot_ppl_toilet
plot_toilet_ppl
Structure:
#Solid Waste Disposal ----
Waste.type <- Kanyama.plot %>% group_by(Where.do.you.dispose.your.solid.wastes.) %>% summarise(count = n())
## Warning: Factor `Where.do.you.dispose.your.solid.wastes.` contains implicit
## NA, consider using `forcats::fct_explicit_na`
plot_waste <- plot_ly(Waste.type, labels = ~Where.do.you.dispose.your.solid.wastes., values =~count, type = "pie",
insidetextfont = list(color = "#FFFFFF")) %>%
layout(title = "Solid Waste Disposal in Kanyama",
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
plot_waste
Structure: 1. X axis: Landlord lives in the plot? (Yes or No) 2. Y axis: Count of plots 3. Fill: Landlord lives in the plot? 4. Type of chart: Bar
#Landlord live on the plot? ----
Landlord <- Kanyama.plot %>% filter(!is.na(Landlord.live.in.the.plot.)) %>% group_by(Landlord.live.in.the.plot.) %>% summarise(count = n())
ggplot(data = Landlord, aes(x = Landlord.live.in.the.plot., y = count, fill = Landlord.live.in.the.plot.)) +
geom_bar(stat = "identity")+
geom_text(aes(label = count), vjust = -0.3) +
theme_bw() +
labs(title = "Landlord live in the plot?", subtitle = " Plots in Kanyama", x = element_blank(), y = element_blank(), fill = "Landlord live in the plot?") +
theme(panel.grid = element_blank()) +
coord_cartesian( ylim = c(0,10000))
Structure:
plot_qualityxlandlord <- ggplot(data = Kanyama.plot,aes(x = Perception.of.the.fill.level, fill = Landlord.live.in.the.plot.) )+
geom_histogram() + theme_light() +
labs(x = "Perception of the fill level", y = "Number of toilets", fill = "Does the landlord live in the plot?")
plot_qualityxlandlord
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 982 rows containing non-finite values (stat_bin).
Structure
plot_disposalxlandlord <- ggplot(data = filter(Kanyama.plot,!is.na(What.happens.when.the.toilet.gets.full.) & !is.na(Landlord.live.in.the.plot.) ), aes(x = What.happens.when.the.toilet.gets.full., fill = Landlord.live.in.the.plot.)) +
geom_histogram(stat = "count") + theme_light()+
labs(x = element_blank(), y = "Number of toilets", fill = "Does the landlord live in the plot?", title = "Full toilet Vs Landlord lives in the place") +
theme(axis.text.x = element_text(angle = 90)) +
coord_flip()
## Warning: Ignoring unknown parameters: binwidth, bins, pad
plot_disposalxlandlord
Structure: 1. Y axis: Count of toilets 2. X axis: Type of toilet 3. Fill: Landlord lives in the plot? 4. Type of Chart: Histogram
plot_typexlandlord <- ggplot(data = Kanyama.plot, aes(x = Interface.Layout, fill = Landlord.live.in.the.plot.)) +
geom_histogram(stat = "count") + coord_flip()
## Warning: Ignoring unknown parameters: binwidth, bins, pad
plot_typexlandlord
Structure: 1. X axis: Landlord lives in the plot 2. Y axis: Average time to empty the toilet 3. Fill: Landlord lives in the plot 4. Type of chart: Bar
time.emptyxlandlord <- Kanyama.plot %>% filter (!is.na(Landlord.live.in.the.plot.) & !is.na(Toilet.emptying.time)) %>%
group_by(Landlord.live.in.the.plot.) %>% summarise(avg.time = mean(as.numeric(as.character(Toilet.emptying.time)), na.rm = T))
## Warning in mean(as.numeric(as.character(Toilet.emptying.time)), na.rm = T):
## NAs introduced by coercion
## Warning in mean(as.numeric(as.character(Toilet.emptying.time)), na.rm = T):
## NAs introduced by coercion
plot_time.emptyxlandlord <- ggplot(data = time.emptyxlandlord, aes(x = Landlord.live.in.the.plot., y = avg.time, fill = Landlord.live.in.the.plot.)) +
geom_bar(stat = "identity") +
geom_text(aes(label = avg.time), vjust = -.5) +
scale_y_continuous(limits = c(0,5)) +
theme_light()
plot_time.emptyxlandlord
Structure: 1. Colour palletes: Aprroach when toilet goes full 2. Label: Water fetch 3. Type of chart: Map
#Toilet reuse ----
pal_disposal <- colorFactor("Set1", domain = Kanyama.valid$What.happens.when.the.toilet.gets.full.)
plot_disposal <- leaflet(data = Kanyama.valid) %>% addProviderTiles(providers$Esri.WorldImagery) %>%
addCircleMarkers(lng = ~X1.9..longitude.,
lat = ~X1.9..latitude.,
color = ~pal_disposal(Kanyama.valid$What.happens.when.the.toilet.gets.full.), label = ~Water.source..fetch.,
radius = 1, fillOpacity = 0.3, stroke = T) %>% addLegend("bottomright",
pal = pal_disposal,
values = ~Kanyama.valid$What.happens.when.the.toilet.gets.full.,
title = "What happens when the toilet gets full?", na.label = "Not Available")
plot_disposal